**********************************************************************
*
* Economic hardship triggers identification with disadvantaged minorities
* Simonovits and Kezdi
* DESCRIBE SAMPLE
*
* 2015
*
* NOTE: First lines of table A1 require restricted use data
* 	that are not provided with the submitted article
*
**********************************************************************

global ROOT c:\Users\gabor\Dropbox
global NABC $ROOT\4_Data\Kompetencia_KIRSTAT\04_Tanulo
global EPDATA $ROOT\4_Data\Eletpalya
global IDENTDATA $ROOT\1_Research\Simonovits_Kezdi\revise_and_resubmit\data

cd "$ROOT\1_Research\Simonovits_Kezdi\revise_and_resubmit\analysis"


**********************************************************************
** TABLE A1 
****************************

** Sampling frame
clear
use azon teltip* ksh4_tan ksh4_th t2 t28 t29 t30 t31 o_zpsc fuzet valid sni using $NABC\2006_tanuloi_8.dta
gen oscore=(o_zpsc-500)/100
compress
save frame_temp,replace
keep azon oscore
save oscore,replace

** merge income per capita in town/village 
use $ROOT\4_Data\Tstar\de.dta,replace
keep if ev==2006
keep tazon de01
rename de01 pop
save temp,replace
use $ROOT\4_Data\Tstar\tx.dta,replace
keep if ev==2006
keep tazon tx02
merge 1:1 tazon using temp, nogen
gen incpc=tx02/pop
keep tazon incpc
save incpc,replace

use frame_temp
gen tazon=ksh4_tan
merge m:1 tazon using incpc, keep(1 3) nogen
replace tazon=ksh4_th
merge m:1 tazon using incpc, keep(1 3 4 5) nogen update

****************************
** line 1: Total Pop
count 
tabstat incpc
****************************
* line 2: NABC test taker
count if oscore!=.
tabstat incpc oscore if oscore!=.
****************************
* line 3: NABC family background qnaire
gen empparent=t30<=3 if t30!=.
 replace empparent=empparent+1 if t31<=3
 replace empparent=1 if empparent==. & t31<=3
 replace empparent=0 if empparent==. & t31>4 & t31<=9
gen lowedmo=t28<=2 if t28!=.
count if t2!=.
tabstat incpc oscore empparent lowedmo if oscore!=.
save temp,replace
****************************
* line 4: Agreed to participate in HLCS
use $ROOT\4_Data\Eletpalya\EP_samplingframe,replace
keep azon 
gen byte agreed=1
merge 1:1 azon using temp, nogen
count if agreed==1
tabstat incpc oscore empparent lowedmo if agreed==1
save temp,replace

keep azon incpc oscore empparent lowedmo 
save temp,replace
*/

****************************
* line 5: Baseline HLCS sample
use $IDENTDATA\identification_workfile.dta,replace
merge m:1 azon using temp , keep(1 3) nogen
gen temp=hardship if wave=="a"
 egen hardship_a=mean(temp), by(sorszam)
 tab wave hardship_a,mis
count if wave=="a"
tabstat incpc oscore empparent lowedmo descent_roma hardship_a [w=suly] if wave=="a"


****************************
* lines 6, 7 & 8: Analysis HLCS sample
keep if wave=="b" | wave=="d" | wave=="e" | wave=="f" 
rename tazon_a tazon
merge m:1 azon using temp , keep(1 3) nogen

gen nomis_descent=descent_roma!=.
gen nomis_roma=roma!=.
gen nomis_hardship=hardship!=.
gen nomis_keyvars=nomis_descent*nomis_roma*nomis_hardship

egen balanced=sum(nomis_keyvars), by(sorszam)
 replace balanced=0 if balanced<4
 recode balanced 4=1

encode wave, gen(wavenum)
xtset sorszam wavenum

* line 6
count if balanced==1 & wave=="f"
tabstat incpc oscore empparent lowedmo descent_roma hardship_a [w=suly] if balanced==1 & wave=="f"

* line 7
xtsum roma if descent_roma==1
tabstat incpc oscore empparent lowedmo descent_roma hardship_a [w=suly] if descent_roma==1 & wave=="b"

* line 8
count if balanced==1 & descent_roma==1 & wave=="f"
tabstat incpc oscore empparent lowedmo descent_roma hardship_a [w=suly] if balanced==1 & descent_roma==1 & wave=="f"


** footnote: distribution of other ethnic identities
use $ROOT\4_Data\Eletpalya\EP_national_ethnic,replace
keep sorszam b222 b223 d79 d80 e154 e155 f152 f153 
rename b222 etni1b 
rename b223 etni2b 
rename d79 etni1d 
rename d80 etni2d 
rename e154 etni1e 
rename e155 etni2e 
rename f152 etni1f 
rename f153 etni2f 
reshape long etni1 etni2, i(sorszam) j(wave) string
merge 1:1 sorszam wave using ident_workfile, keep(3) nogen
gen etni_other=(etni1>1 & etni1<7) | etni1==9 | (etni2>1 & etni2<7) | etni2==9 
foreach x in etni_other roma hun {
	egen `x'_ever=sum(`x'), by(sorszam)
	replace `x'_ever=1 if `x'_ever>1 & `x'_ever<.
}
tabstat etni_other_ever roma_ever hun_ever [w=suly]

